#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time    : 2018/2/8 0008 15:11
# @Author  : Arliki
# @email   : hkdnxycz@outlook.com
# @File    : movie
import scrapy
import re


class XszrSpider(scrapy.Spider):
    name = 'xszr'
    start_urls = [
        "http://www.msj1.com/archives/37.html",
        # "http://www.msj1.com/archives/40.html",
        # "http://www.msj1.com/archives/43.html",
        # "http://www.msj1.com/archives/46.html",
        # "http://www.msj1.com/archives/47.html",
        # "http://www.msj1.com/archives/3715.html",
        # "http://www.msj1.com/archives/4529.html",
        # "http://www.msj1.com/archives/5250.html"
    ]

    def parse(self, response):
        f_name = re.search('.*/(\d*)\.html', response.url).group(1)
        if int(f_name) >= 47:
            box = response.xpath('//*[@id="content"]/table[1]/tbody/tr')
        else:
            box = response.xpath('//*[@id="content"]/table[2]/tbody/tr')
        hre = ''
        for x in box:
            try:
                hre = x.xpath('td/a/@href')[0].extract()
            except:
                pass
            with open('%s.txt' % f_name, 'a+', encoding='utf8') as f:
                if hre:
                    f.write("%s\n" % hre)
